import requests
from bs4 import BeautifulSoup

# 数据爬取
def get_html(url):
    r = requests.get(url, headers=headers)
    r.encoding = "utf-8"
    # print(r.text)
    return r.text



# 数据解析
def parse_data(html_doc):
    soup = BeautifulSoup(html_doc,"html.parser")
    li_list = soup.select("body > div.roll_main > div.scroll_main > div.newslist > ul > li")
    
    
    for li in li_list:
        if li.select("span"):
            news_title = li.select("span.tit > a")[0].string
            news_type = li.select("span.column >a")[0].string
            news_source = li.select("span.source")[0].string
            news_time = li.select("span.time")[0].string
            print(news_title)
            print(news_type)
            print(news_source)
            print(news_time)
        print("-"*30)


# 数据存储
def store_data():
    pass


if __name__ == "__main__":
    url = "http://www.techweb.com.cn/roll/"
    headers = {
        'Accept': '*/*',
        'Accept-Language': 'en-US,en;q=0.8',
        'Cache-Control': 'max-age=0',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36 Edg/100.0.1185.50',
        'Connection': 'keep-alive',
        'Referer': 'https://news.sina.com.cn/'
    }
    html_doc = get_html(url)
    parse_data(html_doc)
